DATA VISUALISATION PROJECT

In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

%matplotlib inline
In [ ]:
flights = pd.read_csv('/content/flights_data.csv')
print(flights.shape)
flights.head()
(10683, 11)
Out[ ]:
Airline Date_of_Journey Source Destination Route Dep_Time Arrival_Time Duration(minutes) Total_Stops Additional_Info Price
0 IndiGo 24/03/2019 Banglore New Delhi BLR → DEL 22:20 01:10 22 Mar 170 0 No info 3897
1 Air India 1/05/2019 Kolkata Banglore CCU → IXR → BBI → BLR 05:50 13:15 445 2 No info 7662
2 Jet Airways 9/06/2019 Delhi Cochin DEL → LKO → BOM → COK 09:25 04:25 10 Jun 1140 2 No info 13882
3 IndiGo 12/05/2019 Kolkata Banglore CCU → NAG → BLR 18:05 23:30 325 1 No info 6218
4 IndiGo 01/03/2019 Banglore New Delhi BLR → NAG → DEL 16:50 21:35 285 1 No info 13302

BAR CHARTS

In [ ]:
sb.countplot(data = flights, x = 'Source')
#plt.xticks(rotation=30)
plt.ylabel('Number of Flights',fontsize=12)
plt.xlabel('Source',fontsize=12)
Out[ ]:
Text(0.5, 0, 'Source')
In [ ]:
base_color = sb.color_palette()[0]
sb.countplot(data = flights, x = 'Source', color = base_color)
plt.xticks(rotation=30)
Out[ ]:
(array([0, 1, 2, 3, 4]), <a list of 5 Text major ticklabel objects>)
In [ ]:
base_color = sb.color_palette()[1]
gen_order = flights['Source'].value_counts().index
sb.countplot(data = flights, x = 'Source', color = base_color, 
              order = gen_order)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc451932790>
In [ ]:
base_color = sb.color_palette()[2]
sb.countplot(data = flights, x = 'Airline', color = base_color)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc451384990>
In [ ]:
base_color = sb.color_palette()[2]
sb.countplot(data = flights, x = 'Airline', color = base_color)
plt.xticks(rotation=90);
In [ ]:
base_color = sb.color_palette()[2]
sb.countplot(data = flights, x = 'Airline', color = base_color)
plt.xticks(rotation=90);
In [ ]:
base_color =sb.color_palette()[2]
sb.countplot(data=flights,y='Airline',color=base_color)
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc45115cc50>

COUNT MISSING DATA

In [ ]:
flights.isna().sum()
Out[ ]:
Airline               0
Date_of_Journey       0
Source                0
Destination           0
Route                16
Dep_Time              0
Arrival_Time          0
Duration(minutes)     0
Total_Stops          27
Additional_Info       0
Price                 0
dtype: int64
In [ ]:
na_counts=flights.isna().sum()
base_color=sb.color_palette()[0]
sb.barplot(na_counts.index.values,na_counts,color=base_color)
plt.xticks(rotation=90)
plt.ylabel('Number of missing values',fontsize=12)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
Out[ ]:
Text(0, 0.5, 'Number of missing values')

PIE CHARTS:bold text

In [ ]:
sorted_counts=flights['Destination'].value_counts()
plt.pie(sorted_counts,labels=sorted_counts.index, startangle=90, counterclock=False);
plt.axis('square')
plt.title('Flight Destination\'s')
Out[ ]:
Text(0.5, 1.0, "Flight Destination's")
In [ ]:
sorted_counts= flights['Destination'].value_counts()
plt.pie(sorted_counts,labels=sorted_counts.index, startangle=90, counterclock=False, wedgeprops={'width':0.4});
plt.axis('square');

HISTOGRAMS

In [ ]:
plt.hist(data =flights, x='Duration(minutes)')
Out[ ]:
(array([4.198e+03, 1.988e+03, 1.720e+03, 7.870e+02, 9.310e+02, 8.450e+02,
        1.320e+02, 7.000e+01, 1.000e+01, 2.000e+00]),
 array([  75. ,  353.5,  632. ,  910.5, 1189. , 1467.5, 1746. , 2024.5,
        2303. , 2581.5, 2860. ]),
 <a list of 10 Patch objects>)
In [ ]:
plt.hist(data=flights, x='Price',bins =20)
Out[ ]:
(array([ 247., 1218., 1350., 1123., 1228.,  769.,  964.,  832.,  886.,
         789.,  735.,  195.,  109.,   76.,   18.,   27.,   24.,   10.,
          15.,   68.]),
 array([ 1759.  ,  3021.05,  4283.1 ,  5545.15,  6807.2 ,  8069.25,
         9331.3 , 10593.35, 11855.4 , 13117.45, 14379.5 , 15641.55,
        16903.6 , 18165.65, 19427.7 , 20689.75, 21951.8 , 23213.85,
        24475.9 , 25737.95, 27000.  ]),
 <a list of 20 Patch objects>)
In [ ]:
bins = np.arange(0 ,flights['Price'].max()+1, 1200)
plt.hist(data=flights , x='Price',bins=bins)
plt.show()
In [ ]:
sb.distplot(flights['Price']);
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [ ]:
sb.distplot(flights['Price'],kde=False);
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [ ]:
bin_edges=np.arange(0 ,flights['Price'].max()+1,1200)
sb.distplot(flights['Price'],bins=bin_edges,kde=False,hist_kws={'alpha':1});
/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2557: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)